Backward trajectories using HYSPLIT

In this section, we will be using HYSPLIT to find out where the air reaching Jakarta is coming from. We’ll combine this with PM 2.5 levels to understand where potential transboundary sources of air pollution lie.

Note that this document is more of an introduction than an actual scientific analysis, as many potential factors and sensitivy analyses won’t be included.

Installing packages

Beside more traditional packages, we weill be using three dedicated packages on air pollution, namely: - RCREA: CREA’s own package to retrieve air pollution levels - Splitr: a convenient wrapper for HYSPLIT calculation - OpenAIR: a very powerful package to analyse air trajectories

if(!require(rcrea)){devtools::install_github("energyandcleanair/rcrea"); require(rcrea)}
if(!require(splitr)){devtools::install_github("rich-iannone/splitr"); require(splitr)}
if(!require(openair)){devtools::install_github('davidcarslaw/openair'); library(openair)}

Other packages used for this tutorial are:

library(lubridate)
library(pbmcapply)
library(dplyr)
library(ggmap)
library(ggplot2)

Backward trajectories of a single date

Let’s take Jakarta and 1 September 2020 as an example.

HYSPLIT/Splitr needs some parameters: - weather dataset used to calculate trajectories - height of the ‘receptor’ - duration of the trajectories

met_type <- "reanalysis" # or gdas1
height <- 10 #meter
duration <- 72 #hours

We also specify the folder where weather data will be cached (can take few GBs), and results temporarily stored.

dir_hysplit_met <- "/Volumes/ext1/data/weather/hysplit_met/" # Folder where weather files will be downloaded/cached
dir_hysplit_output <- here::here("hysplit_output") # Folders where HYSPLIT results will be temporarily stored

Now we can run the backward trajectory calculations!

trajs_at_date <- function(date, met_type){
  splitr::hysplit_trajectory(
      lon = lon,
      lat = lat,
      height = height,
      duration = duration,
      days = lubridate::date(date),
      daily_hours = c(0, 6, 12, 18), # The weather dataset is available every 6-hours
      direction = "backward",
      met_type = met_type,
      extended_met = F,
      met_dir = dir_hysplit_met,
      exec_dir = dir_hysplit_output,
      clean_up = F
  )
}

trajs <- trajs_at_date(date, met_type)

The trajs dataframe contains the trajectories. Let’s plot them.

plot_trajs <- function(trajs){
  
  # Get the boundary box: 50km around trajectories
  buffer_km <- 50
  bbox <- sf::st_as_sf(trajs, coords=c("lon","lat"), crs=4326) %>%
    sf::st_transform(crs=3857) %>% # Reproject in pseudo-mercator to have meter unit
    sf::st_buffer(buffer_km*1E3) %>%
    sf::st_transform(crs=4326) %>%
    sf::st_bbox() %>%
    unname()
    
  basemap <- ggmap::get_stamenmap(bbox = bbox,
                                  zoom = 8)
  
  ggmap(basemap) +
     coord_cartesian() +
      # trajectory lines
     geom_path(data = trajs %>%
                 dplyr::arrange(hour_along) %>%
                 mutate(subcluster=paste(traj_dt_i, hour_along %/% 8)),
               arrow = arrow(angle=18, length=unit(0.1,"inches")),
               aes(x = lon, y = lat, group=subcluster), color="darkred", alpha=0.6) +
      geom_path(data = trajs,
               aes(x = lon, y = lat, group=traj_dt_i), color="darkred", alpha=0.6) +
     # theme
     theme(panel.background = element_rect(fill='lightgray'),
           panel.border = element_rect(color='black', fill=NA),
           panel.grid = element_line(color=NA),
           plot.caption = element_text(lineheight = 0.9),
           legend.position="right",
           legend.key = element_rect(fill='white')) +
     labs(title=paste("Sources or air flowing into Jakarta"),
          subtitle = unique(lubridate::date(trajs$date)),
          x='', y='',
          caption=paste0("Source: CREA based on HYSPLIT. ",
                         "Weather dataset: ", met_type, " ",
                         "Receptor height: ", height, "m. ",
                         "Duration: ", duration, " hour."))
   
}
plot_trajs(trajs)

Using another weather dataset:

trajs_at_date(date, met_type="gdas1") %>%
  plot_trajs()

We can repeat this at other dates.

trajs_at_date("2019-01-01", met_type) %>%
  plot_trajs()

trajs_at_date("2019-01-01", met_type="gdas1") %>%
  plot_trajs()

Trajectories over a longer period: 2019

We might be interested in trajectories over a longer period of time. Here we’ll consider the whole of 2019.

date_from <- "2019-01-01"
date_to <- "2019-12-31"
dates <- seq(lubridate::date(date_from), lubridate::date(date_to), by="d")
dates_split <- split(dates, ceiling(seq_along(dates)/5)) # Compute 5 days at time per core

trajs_at_dates <- function(dates){
  tryCatch({
    hysplit_trajectory(
      lon = lon,
      lat = lat,
      height = height,
      duration = duration,
      days = dates,
      daily_hours = c(0, 6, 12, 18),
      direction = "backward",
      met_type = met_type,
      extended_met = F,
      met_dir = dir_hysplit_met,
      exec_dir = dir_hysplit_output,
      clean_up = F
    ) 
  },
  error=function(c){
    return(NA)
  })
}

trajs.2019 <- do.call('rbind',
                 pbmclapply(dates_split, trajs_at_dates,
                            mc.cores = parallel::detectCores()-1))

We first need to make trajectories compatible with openair (e.g. rename or add certain fields):

format_for_openair <- function(trajs){
   # Update fields to be compatible with OpenAIR
  trajs$hour.inc <- trajs$hour_along
  trajs$date <- trajs$traj_dt_i
  trajs$date2 <- trajs$traj_dt
  trajs$year <- lubridate::year(trajs$traj_dt_i)
  trajs$month <- lubridate::month(trajs$traj_dt_i)
  trajs$day <- lubridate::date(trajs$traj_dt_i)

  return(trajs)
}

trajs.2019 <- format_for_openair(trajs.2019)

Let’s also add PM2.5 values, using CREA R package.

pm25.2019 <- rcrea::measurements(city="jakarta", source="openaq", date_from=date_from, date_to=date_to, poll=rcrea::PM25) %>%
  mutate(date=lubridate::date(date))
trajs.poll.2019 <- trajs.2019 %>% left_join(pm25.2019, by=c("day"="date"))

Plotting monthly trajectories, colored by PM2.5 levels

plt.months <- trajPlot(
  trajs.poll.2019,
  pollutant = "value",
  type = "month",
  map = TRUE,
  group = NA,
  map.fill = TRUE,
  map.res = "default",
  map.cols = "grey40",
  map.alpha = 0.4,
  projection = "mercator",
  parameters = NULL,
  orientation = c(90, 0, 0),
  grid.col = "transparent",
  npoints = 12,
  origin = TRUE
)

Zooming in on a month:

plot_trajs_month <- function(trajs, month){
  plt.month <- trajPlot(
    trajs %>% filter(lubridate::month(date)==!!month),
    pollutant = "value",
    type = "default",
    map = TRUE,
    group = NA,
    map.fill = TRUE,
    map.res = "default",
    map.cols = "grey40",
    map.alpha = 0.4,
    projection = "mercator",
    parameters = NULL,
    orientation = c(90, 0, 0),
    grid.col = "transparent",
    npoints = 12,
    origin = TRUE
  )
}

plot_trajs_month(trajs.poll.2019, 8)

plot_trajs_month(trajs.poll.2019, 1)

Let’s overlay these trajectories with industries.

We’re using industry locations from PROPER programme.

proper <- read.csv(file.path('../data/proper_industries.csv')) %>%
  mutate(Sector=recode(Sector,
         `Petroleum, Chemicals & Plastics`="Petrochemicals & Plastics")) %>%
  filter(Sector %in% c("Metal & non-metallic minerals",
             "Petrochemicals & Plastics",
             "Oil & Gas refining",
             "Power & Energy",
             "Cement, Steel & heavy industry",
             "Glass"
             ))
plot_trajs_industries <- function(trajs, industries, subtitle){
  
  # Get the boundary box: 50km around trajectories
  buffer_km <- 100
  bbox <- sf::st_as_sf(trajs, coords=c("lon","lat"), crs=4326) %>%
    sf::st_transform(crs=3857) %>% # Reproject in pseudo-mercator to have meter unit
    sf::st_buffer(buffer_km*1E3) %>%
    sf::st_transform(crs=4326) %>%
    sf::st_bbox() %>%
    unname()
    
  basemap <- ggmap::get_stamenmap(bbox = bbox,
                                  zoom = 8,
                                  where="/Volumes/ext1/data/basemap")
  
  ggmap(basemap) +
     coord_cartesian() +
     # trajectory lines
     geom_path(data = trajs %>%
                 dplyr::arrange(hour_along) %>%
                 mutate(subcluster=paste(traj_dt_i, hour_along %/% 8)),
               arrow = arrow(angle=18, length=unit(0.1,"inches")),
               aes(x = lon, y = lat, group=subcluster, color=value),
               alpha=0.6) +
    geom_path(data = trajs %>% filter(value<70),
               aes(x = lon, y = lat, group=traj_dt_i, color=value), alpha=0.6) +
    geom_path(data = trajs %>% filter(value>=70),
               aes(x = lon, y = lat, group=traj_dt_i, color=value), alpha=0.6) +

    # Industry
    geom_point(data=industries, inherit.aes = F, aes(x=longitude,y=latitude, shape=Sector, fill=Sector),
               alpha=0.8, position="jitter") +
     # theme
    scale_shape_manual(name="Sector", values=c(15,16,17,18,19,20)) +
    scale_fill_brewer(name="Sector", palette="Dark2") +
    scale_color_gradientn(colours = c("cyan","yellow","red")) +
    theme(panel.background = element_rect(fill='lightgray'),
           panel.border = element_rect(color='black', fill=NA),
           panel.grid = element_line(color=NA),
           plot.caption = element_text(lineheight = 0.9),
           legend.position="right",
           legend.key = element_rect(fill='white')) +
    labs(title=paste("Sources or air flowing into Jakarta"),
          subtitle = subtitle,
          x='', y='',
          caption=paste0("Source: CREA based on PROPER and HYSPLIT. ",
                         "Weather dataset: ", met_type, ". ",
                         "Receptor height: ", height, "m. ",
                         "Duration: ", duration, " hour."))
   
}
selected_month <- 8
plot_trajs_industries(
  trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==selected_month),
  industries=proper,
  subtitle=paste("Month:", selected_month))

Zooming in:

plot_trajs_industries(
  trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==selected_month),
  industries=proper,
  subtitle=paste("Month:", selected_month)) +
coord_cartesian(xlim=c(106.5,108), ylim=c(-8,-6)) 

plot_trajs_industries(
  trajs=trajs.poll.2019 %>% filter(lubridate::month(date)==1),
  industries=proper,
  subtitle=paste("Month:", 1))

OpenAIR package offers many interesting plotting and analysis fonctionalities: read here

For instance, we can identify the most representative air trajectories:

plot_traj_clusters <- function(trajs, n_clusters=4, statistics="frequency"){
  
  openair::trajCluster(
    trajs.2019,
    method = "Euclid",
    n.cluster = n_clusters,
    plot = T,
    type = "default",
    cols = "Set1",
    split.after = FALSE,
    map.fill = TRUE,
    map.cols = "grey40",
    map.alpha = 0.4,
    projection = "mercator",
    parameters = NULL,
    orientation = c(90, 0, 0),
    by.type = FALSE,
    origin = TRUE
  )
}

(plt.trajs.2019 <- plot_traj_clusters(trajs.2019))